import pandas as pd
from gensim import corpora, models

# 载入数据
posdata = pd.read_csv("pos_result.csv", encoding='utf-8')
negdata = pd.read_csv("neg_result.csv", encoding='utf-8')

# 只保留nature为'n'的数据
posdata = posdata[posdata['nature'] == 'n']
negdata = negdata[negdata['nature'] == 'n']

# 建立词典
pos_dict = corpora.Dictionary([[i] for i in posdata['word']])  # 正面
neg_dict = corpora.Dictionary([[i] for i in negdata['word']])  # 负面

# 建立语料库
pos_corpus = [pos_dict.doc2bow(j) for j in [[i] for i in posdata['word']]]  # 正面
neg_corpus = [neg_dict.doc2bow(j) for j in [[i] for i in negdata['word']]]  # 负面

# 训练LDA模型
pos_lda = models.LdaModel(corpus=pos_corpus, id2word=pos_dict, num_topics=3)
neg_lda = models.LdaModel(corpus=neg_corpus, id2word=neg_dict, num_topics=3)

# 提取并打印主题
print("正面数据的主题:")
for idx, topic in pos_lda.print_topics(-1):
    print("主题: {} \n词汇: {}".format(idx+1, topic))

print("\n负面数据的主题:")
for idx, topic in neg_lda.print_topics(-1):
    print("主题: {} \n词汇: {}".format(idx+1, topic))
